library(vars)
## Loading required package: MASS
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: urca
## Loading required package: lmtest
library(magrittr)
library(tidyquant)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## Loading required package: quantmod
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
## Loading required package: tidyverse
## -- Attaching packages --------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.1.0     √ purrr   0.2.5
## √ tibble  2.0.1     √ dplyr   0.7.8
## √ tidyr   0.8.2     √ stringr 1.3.1
## √ readr   1.3.1     √ forcats 0.3.0
## -- Conflicts ------------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x stringr::boundary()      masks strucchange::boundary()
## x lubridate::date()        masks base::date()
## x tidyr::extract()         masks magrittr::extract()
## x dplyr::filter()          masks stats::filter()
## x dplyr::first()           masks xts::first()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x dplyr::last()            masks xts::last()
## x dplyr::select()          masks MASS::select()
## x purrr::set_names()       masks magrittr::set_names()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()
library(timetk)
library(egg)
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(grid)
library(gridExtra)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(urca)

theme_set(theme_bw() + 
          theme(strip.text.x = element_text(hjust = 0),
                strip.text.y = element_text(hjust = 1),
                strip.background = element_blank()))

Problem 1

The response of hours worked to different shocks has been studied extensively since Gali (1999), who argued that hours worked show a decline in response to a positive technology shock. In this problem, you will replicate some of his results.

  1. First, use tq_get to obtain the following two quarterly time series for the period 1947Q1-2017Q4 from FRED: labor productivity, measured as Nonfarm Business Sector: Real Output Per Hour of All Persons OPHNFB and for total hours worked, measured as Nonfarm Business Sector: Hours of All Persons HOANBS.
labor_tbl1 <-
    tq_get(c("OPHNFB", "HOANBS"),
           get = "economic.data", from = "1947-01-01", to = "2017-12-31") %>%
    mutate(yearq = as.yearqtr(date),
           y=price,
           ly=log(y),
           dly= log(y)-lag(log(y)),
           msa = case_when(
                            symbol == "OPHNFB"      ~ "Y2H",        # Nonfarm Business Sector: Real Output Per Hour of All Persons
                              symbol == "HOANBS"      ~ "H"           # Nonfarm Business Sector: Hours of All Persons
                              )) %>%
    ungroup() %>%
    select(yearq, symbol, dly) %>%
    mutate(symbol = str_c("dl", symbol)) %>%
    spread(symbol,dly)
labor_tbl2 <-
    tq_get(c("OPHNFB", "HOANBS"),
           get = "economic.data", from = "1947-01-01", to = "2017-12-31") %>%
    mutate(yearq = as.yearqtr(date),
           y=price,
           ly=log(y),
           dly= log(y)-lag(log(y)),
           msa = case_when(
                            symbol == "OPHNFB"      ~ "Y2H",        # Nonfarm Business Sector: Real Output Per Hour of All Persons
                              symbol == "HOANBS"      ~ "H"           # Nonfarm Business Sector: Hours of All Persons
                              )) %>%
    ungroup() %>%
    select(yearq, symbol, ly) %>%
    mutate(symbol = str_c("l", symbol)) %>%
    spread(symbol,ly)
labor_tbl <- inner_join(labor_tbl1 ,labor_tbl2 ,by = "yearq")
labor_tbl                                                                     
  1. Test the log of real output per hour \(y_{1,t} = \log OPHNFB_t\) and the log of hours \(y_{2,t} = \log HOANBS_t\) for the presence of unit root using ERS test. Afterwards apply the ERS unit root test also to the first differences, \(\Delta y_{1,t}\) and \(\Delta y_{2,t}\). Comment on results.
##unit root test for the log term
labor_tbl %>%tk_ts(select= c("lHOANBS", "lOPHNFB")) %>% 
   ur.ers(type="DF-GLS", model="trend") %>% summary()
## 
## ############################################### 
## # Elliot, Rothenberg and Stock Unit Root Test # 
## ############################################### 
## 
## Test of type DF-GLS 
## detrending of series with intercept and trend 
## 
## 
## Call:
## lm(formula = dfgls.form, data = data.dfgls)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.55129 -0.00220  0.00508  0.01013  0.03116 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## yd.lag       -0.012610   0.006845  -1.842    0.066 .
## yd.diff.lag1 -0.003223   0.042354  -0.076    0.939  
## yd.diff.lag2  0.031691   0.042334   0.749    0.454  
## yd.diff.lag3 -0.017609   0.042348  -0.416    0.678  
## yd.diff.lag4  0.003470   0.042348   0.082    0.935  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06644 on 558 degrees of freedom
## Multiple R-squared:  0.007396,   Adjusted R-squared:  -0.001498 
## F-statistic: 0.8315 on 5 and 558 DF,  p-value: 0.5275
## 
## 
## Value of test-statistic is: -1.8422 
## 
## Critical values of DF-GLS are:
##                  1pct  5pct 10pct
## critical values -3.48 -2.89 -2.57
## unit root test for the first difference 
labor_tbl %>%tk_ts(select= c("dlHOANBS", "dlOPHNFB")) %>% 
   ur.ers(type="DF-GLS", model="trend") %>% summary()
## 
## ############################################### 
## # Elliot, Rothenberg and Stock Unit Root Test # 
## ############################################### 
## 
## Test of type DF-GLS 
## detrending of series with intercept and trend 
## 
## 
## Call:
## lm(formula = dfgls.form, data = data.dfgls)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.047828 -0.006995 -0.002585  0.003158  0.045703 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## yd.lag       -0.005081   0.001129  -4.499 8.32e-06 ***
## yd.diff.lag1 -0.493093   0.042135 -11.703  < 2e-16 ***
## yd.diff.lag2 -0.122739   0.046729  -2.627  0.00886 ** 
## yd.diff.lag3 -0.052138   0.042426  -1.229  0.21961    
## yd.diff.lag4 -0.010794   0.011514  -0.937  0.34894    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.009715 on 557 degrees of freedom
## Multiple R-squared:  0.2196, Adjusted R-squared:  0.2126 
## F-statistic: 31.34 on 5 and 557 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -4.4987 
## 
## Critical values of DF-GLS are:
##                  1pct  5pct 10pct
## critical values -3.48 -2.89 -2.57

Comments: 1)From the ERS unit root test of log term, we fail to reject the null hypothesis that there is unit root. 2)From the ERS unit root test of the first difference, we have to reject the null hypothesis that there is unit root, namely there is no unit root but trend stationary. so we need to use the first difference data.

  1. Estimate a bivariate reduced form VAR for \(\mathbf y_t = (\Delta y_{1,t}, \Delta y_{2,t})'\), using AIC information criteria to select number of lags.
# convert the data into ts
labor.ts <-
    labor_tbl%>%
    dplyr::select(yearq, dlHOANBS,dlOPHNFB) %>%
   drop_na() %>%
    tk_ts(select = c("dlOPHNFB","dlHOANBS"), start = .$yearq[1], frequency = 4)
  

# estimate VAR(p) using AIC to select p
varp <- VAR(labor.ts, ic = "AIC", lag.max = 8, type = "const")  
varp 
## 
## VAR Estimation Results:
## ======================= 
## 
## Estimated coefficients for equation dlOPHNFB: 
## ============================================= 
## Call:
## dlOPHNFB = dlOPHNFB.l1 + dlHOANBS.l1 + dlOPHNFB.l2 + dlHOANBS.l2 + dlOPHNFB.l3 + dlHOANBS.l3 + const 
## 
##  dlOPHNFB.l1  dlHOANBS.l1  dlOPHNFB.l2  dlHOANBS.l2  dlOPHNFB.l3 
## -0.082176486  0.067209114  0.062375949 -0.196434099 -0.007676074 
##  dlHOANBS.l3        const 
## -0.193686165  0.006384977 
## 
## 
## Estimated coefficients for equation dlHOANBS: 
## ============================================= 
## Call:
## dlHOANBS = dlOPHNFB.l1 + dlHOANBS.l1 + dlOPHNFB.l2 + dlHOANBS.l2 + dlOPHNFB.l3 + dlHOANBS.l3 + const 
## 
##   dlOPHNFB.l1   dlHOANBS.l1   dlOPHNFB.l2   dlHOANBS.l2   dlOPHNFB.l3 
##  0.1099823203  0.6219060919  0.1043853139 -0.0135115847  0.0824276615 
##   dlHOANBS.l3         const 
## -0.0445133802 -0.0002505716
summary(varp)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: dlOPHNFB, dlHOANBS 
## Deterministic variables: const 
## Sample size: 280 
## Log Likelihood: 1964.589 
## Roots of the characteristic polynomial:
## 0.7327 0.7327 0.424 0.424 0.411 0.411
## Call:
## VAR(y = labor.ts, type = "const", lag.max = 8, ic = "AIC")
## 
## 
## Estimation results for equation dlOPHNFB: 
## ========================================= 
## dlOPHNFB = dlOPHNFB.l1 + dlHOANBS.l1 + dlOPHNFB.l2 + dlHOANBS.l2 + dlOPHNFB.l3 + dlHOANBS.l3 + const 
## 
##               Estimate Std. Error t value Pr(>|t|)    
## dlOPHNFB.l1 -0.0821765  0.0587519  -1.399   0.1630    
## dlHOANBS.l1  0.0672091  0.0704919   0.953   0.3412    
## dlOPHNFB.l2  0.0623759  0.0555624   1.123   0.2626    
## dlHOANBS.l2 -0.1964341  0.0829575  -2.368   0.0186 *  
## dlOPHNFB.l3 -0.0076761  0.0547219  -0.140   0.8885    
## dlHOANBS.l3 -0.1936862  0.0723749  -2.676   0.0079 ** 
## const        0.0063850  0.0007503   8.510 1.17e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.007947 on 273 degrees of freedom
## Multiple R-Squared: 0.1192,  Adjusted R-squared: 0.09987 
## F-statistic: 6.159 on 6 and 273 DF,  p-value: 4.484e-06 
## 
## 
## Estimation results for equation dlHOANBS: 
## ========================================= 
## dlHOANBS = dlOPHNFB.l1 + dlHOANBS.l1 + dlOPHNFB.l2 + dlHOANBS.l2 + dlOPHNFB.l3 + dlHOANBS.l3 + const 
## 
##               Estimate Std. Error t value Pr(>|t|)    
## dlOPHNFB.l1  0.1099823  0.0504245   2.181   0.0300 *  
## dlHOANBS.l1  0.6219061  0.0605005  10.279   <2e-16 ***
## dlOPHNFB.l2  0.1043853  0.0476870   2.189   0.0294 *  
## dlHOANBS.l2 -0.0135116  0.0711992  -0.190   0.8496    
## dlOPHNFB.l3  0.0824277  0.0469657   1.755   0.0804 .  
## dlHOANBS.l3 -0.0445134  0.0621165  -0.717   0.4742    
## const       -0.0002506  0.0006440  -0.389   0.6975    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.00682 on 273 degrees of freedom
## Multiple R-Squared: 0.4312,  Adjusted R-squared: 0.4186 
## F-statistic: 34.49 on 6 and 273 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##           dlOPHNFB  dlHOANBS
## dlOPHNFB 6.315e-05 5.958e-06
## dlHOANBS 5.958e-06 4.652e-05
## 
## Correlation matrix of residuals:
##          dlOPHNFB dlHOANBS
## dlOPHNFB   1.0000   0.1099
## dlHOANBS   0.1099   1.0000

Comments: if Use AIC, we can select the number of lags as 3, which is consistent with the VARselect result.

  1. Suppose that we want to analyze effects of two types of shocks - technology shocks and demand shocks on hours worked. Use Blanchard and Quah approach to obtain an SVAR model where we impose the condition that demand shocks do not affect real output per hour (i.e. labor productivity) \(y_{1,t}\) in the long run.
  2. Report and interpret the contemporaneous impact and the long run impact matrices for the SVAR.
mod_svar <- BQ(varp)
summary(mod_svar)    
## 
## SVAR Estimation Results:
## ======================== 
## 
## Call:
## BQ(x = varp)
## 
## Type: Blanchard-Quah 
## Sample size: 280 
## Log Likelihood: 1957.5 
## 
## Estimated contemporaneous impact matrix:
##           dlOPHNFB dlHOANBS
## dlOPHNFB  0.006574 0.004464
## dlHOANBS -0.003188 0.006029
## 
## Estimated identified long run impact matrix:
##           dlOPHNFB dlHOANBS
## dlOPHNFB  0.007164  0.00000
## dlHOANBS -0.002435  0.01382
## 
## Covariance matrix of reduced form residuals (*100):
##           dlOPHNFB  dlHOANBS
## dlOPHNFB 0.0063148 0.0005958
## dlHOANBS 0.0005958 0.0046516

Comments: (1) From the Estimated identified long-run impact matrix, we can get the long run cumulative effect of any demand shock (dlHOANBS) on productivity (dlOPHNFB) is 0, which is consistent with the assumption. (2)From the contemporaneous impact matrix, we can get a positive standard deviation technology shock increase productivity by 0.006574% and lowers working hours by 0.00319%.a positive standard deviation demand shock increase productivity by 0.004465% and increases working hours by 0.006029%.

  1. Plot the cumulative IRFs based on the SVAR model from (d) and interpret them - explain what say about the effects of the two types of shocks on labor productivity and hours worked.
# arrange IRF data into a tibble to be used with ggplot
# and plot IRFs using ggplot
# standard non-cumulative IRFs
svar_irf <- irf(mod_svar, n.ahead = 40, ci = .9)
# cumulative IRFs
svar_irf_c <- irf(mod_svar, n.ahead = 40, ci = .9, cumulative = TRUE)

svar_irf_tbl <-
    bind_rows(# standard IRFs for dlHOANBS
              svar_irf %>%     ##for cumulative IRFS use svar_irf_c
                  keep(names(.) %in% c("irf", "Lower", "Upper")) %>%
                  modify_depth(2, as_tibble) %>%
                  modify_depth(1, bind_rows, .id = "impulse") %>%
                  map_df(bind_rows, .id = "key") %>%
                  dplyr::select(-dlOPHNFB) %>%
                  gather(response, value, -key, -impulse),
              # cumulative IRFs for dlOPHNFB
              svar_irf_c %>%
                  keep(names(.) %in% c("irf", "Lower", "Upper")) %>%
                  modify_depth(2, as_tibble) %>%
                  modify_depth(1, bind_rows, .id = "impulse") %>%
                  map_df(bind_rows, .id = "key") %>%
                  dplyr::select(-dlHOANBS) %>%
                  gather(response, value, -key, -impulse)) %>%
    group_by(key, impulse, response) %>%
    mutate(lag = row_number()) %>%
    ungroup() %>%
    # change signs for the non-technology shock IRFs so that they show effects of a positive shock, not a negative one
    mutate(value = if_else(impulse == "dlHOANBS", -value, value)) %>%
    spread(key, value)

g <- svar_irf_tbl %>%
    mutate(impulse_label = case_when(impulse == "dlOPHNFB" ~ 1,
                                    impulse == "dlHOANBS"     ~ 2) %>% factor(labels = c("technology shock","non-technology shock")),
           response_label = case_when(response == "dlOPHNFB" ~ "dly1",
                                      response == "dlHOANBS" ~ "dly2") ) %>%
    ggplot(aes(x = lag, y = irf)) +
        geom_ribbon(aes(x = lag, ymin = Lower, ymax = Upper), fill = "gray50", alpha = .3) +
        geom_line() +
        geom_hline(yintercept = 0, linetype = "dashed") +
        labs(x = "", y = "", title = "SVAR Impulse Response Functions") +
        facet_grid(response_label ~ impulse_label, switch = "y", scales = "free_y")
g                   

# plot IRFs using plotly
library(plotly)
ggplotly(g)  

Comments: From “SVAR Impulse Response functions it shows the shock from the technology shock significantly affect dly1, namely the productivity, and the technology shock’s influence on the working hours just last for 3 quarters. The demand shock will significantly affect the productivity and the working hours for about 5 quarters.

  1. Compare your IRFs with Figure 2 from Gali (1999) AER. Comments: My IRFs is roughly consistent with the Figure 2 from Gali.

  2. Construct the FEVD for the SVAR model from (d). How much of the overall fluctuations in \(\Delta y_{1,t}\) and \(\Delta y_{2,t}\) is explained in the short run by the two shocks? How about in the long run?

#### FEVD ####

# Short run
par(mar = c(4,5,2,1))
mod_fevd <- varp %>% fevd(n.ahead = 40) 
mod_fevd %>% plot(addbars = 3)      

ggfevd <- function(var_fevd, n.ahead = NULL) {
    
    # arrange FEVD data into a tibble to be used with ggplot
    var_fevd_tbl <-
        var_fevd %>%
        modify_depth(1, as_tibble) %>%
        map_df(bind_rows, .id = "variable") %>%
        gather(shock, value, -variable) %>%
        group_by(shock, variable) %>%
        mutate(horizon = row_number()) %>%
        ungroup() %>% 
       mutate(shock = recode(shock, dlOPHNFB = "technology", dlHOANBS = "demand"))

    if (!is.null(n.ahead)) var_fevd_tbl %<>% filter(horizon <= n.ahead)
    
    # plot FEVD using ggplot
    g2 <- ggplot(data = var_fevd_tbl, aes(x = horizon, y = value, fill = shock)) +
        geom_col(position = position_stack(reverse = TRUE)) +
        scale_fill_manual(values = wesanderson::wes_palette("GrandBudapest1")[c(3, 2, 4, 1)]) +
        # scale_fill_manual(values = c("gray80", "gray60", "gray40", "gray20")) +
        labs(x = "horizon", y = "fraction of overall variance", title = "Short-Run Forecast Error Variance Decomposition") +
        facet_grid(variable ~ .)
    g2
}
mod_fevd %>% ggfevd()

mod_fevd %>% ggfevd() %>% ggplotly() 
# long run
mod_svar %>% fevd(n.ahead = 40) %>% plot(addbars = 10) 

# same as above, but using ggplot
mod_svar_fevd <- fevd(mod_svar, n.ahead = 40)
    
# arrange FEVD data into a tibble to be used with ggplot
svar_fevd_tbl <-
    mod_svar_fevd %>%
    modify_depth(1, as_tibble) %>%
    map_df(bind_rows, .id = "variable") %>%
    gather(shock, value, -variable) %>%
    group_by(shock, variable) %>%
    mutate(horizon = row_number()) %>%
    ungroup() %>%
    mutate(shock = recode(shock, dlOPHNFB = "technology", dlHOANBS = "demand"))

# plot FEVD using ggplot
g1 <- ggplot(data = svar_fevd_tbl, aes(x = horizon, y = value, fill = shock)) +
    geom_col(position = position_stack(reverse = TRUE)) +
    scale_fill_manual(values = c("gray80", "gray40")) +
    labs(x = "horizon", y = "fraction of overall variance", title = "Long-Run Forecast Error Variance Decomposition") +
    facet_wrap(variable ~ ., ncol = 1)
g1

ggplotly(g1) 

Comments:1)In the short run: For the working hours(dlHOANBS), demand shock is more important, roughly 10% of the volatility of dlHOANBS is explained by the technology shock, and the remaining part 90% of the volatility of dlHOANBS is due to the technology shock. For the productivity(dlOPHNFB), technology shock is more important, roughly 90% of the volatility of dlOPHNFB is explained by the technology shock, and the remaining part of 10% of the volatility of dlOPHNFB is due to the demand shock. 2) In the long run: For the working hours(dlHOANBS), demand shock is more important, roughly 16% of the volatility of dlHOANBS is explained by the technology shock, and the remaining part 84% of the volatility of dlHOANBS is due to the technology shock. For the productivity(dlOPHNFB), technology shock is more important, roughly 64% of the volatility of dlOPHNFB is explained by the technology shock, and the remaining part of 36% of the volatility of dlOPHNFB is due to the demand shock.